﻿/*  
cd /projects/hsieh_project/proj_201809/code_1_data/
qsas data_1_mkt_ind_sum.sas 5 &
*/

libname hr "/projects/hsieh_project/proj_201809/data/";

%Let dir_out = /projects/hsieh_project/proj_201809/data/;


/*
================================================================================
Load Data
*/

%include "/projects/hsieh_project/proj_201809/code_1_data/m_read.sas" /source2;
%m_read(param_dev=1,param_lyear=%quote(1977,1982,1987,1992,1997,2001,2002,2007,2013,2014,2016),param_czone=1,param_msa1983=1,param_msacz=1,param_drop=1);

/*
================================================================================
Macro that calculates total number of markets by industry
*/

%include "/projects/hsieh_project/code_0_general/m_perc_by_var.sas" /source2;

%macro n_var_by_indf(ivar= , ivaro= );

/* 
--------------------------------------------------------------------------------
Prepare dt_var (=lbd that we work with within the macro)
*/

%put --------------------------------------------------------------------------------;
%put &ivar.;

%if "&ivar." ~= "zipcode" %then %do;
data dt_var;
  set lbd;
run;
%end;

%if "&ivar." = "zipcode" %then %do;
/* Remove missing zip codes */
data dt_var;
   set lbd;
   if zipcode = "" then delete;
   zipcode = substr(zipcode,1,5);
run;
%end;

/* 
--------------------------------------------------------------------------------
Determine the basic unit to count
e.g.
1) ivar = "": Basic unit is ind-firm
2) ivar = "fips": Basic unit is ind-firm-fips
*/

%if "&ivar." = "" %then %do;
%Let vfirst = firmnum;
%end;
%else %do;
%Let vfirst = &ivar.;
%end;

proc sort data=dt_var; by year ch_ind firmnum &ivar.;

data dt_var;
  set dt_var; /* dt_var is presently at establishment level. */
  by year ch_ind firmnum &ivar.;
  if first.&vfirst.; /* Subsetting, resulting data set is at year ch_ind firmnum &ivar. level */
  &ivaro.=1;
run;

%put ----------------------------------------;
%put Define Top Firms;

/* 
--------------------------------------------------------------------------------
Define top firms based on markets (note that this differs from the employment based definition)
*/

proc sort data=dt_var; by year ch_ind firmnum &ivar.; /* At geo level */

%m_perc_by_var(ds_in=dt_var, ds_out=dt_perc, var_in=&ivaro., var_by=%bquote(year ch_ind), var_unit=firmnum, var_out=mkt, var_runif=runif);

proc sort data=dt_var; by year ch_ind firmnum;
proc sort data=dt_perc; by year ch_ind firmnum;

/*
dt_var is at industry-firm-city level. dt_perc is at industry-firm level, and contains rank/percentile for 
each industry-firm. Merge is done to essentially get the rank/percentile of the firms to dt_var
*/

data dt_indf;
  merge dt_var dt_perc;
  by year ch_ind firmnum;
run;

%put ----------------------------------------;
%put Count Markets;

/*
--------------------------------------------------------------------------------
Count total number of cities for top firms
Top firms defined based on markets
*/

%Let l_perc=1 10 20 30 40 50 60 70 80 90;
%Let l_ind_var=&ivaro.;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_perc), &i_list) ~= );
  %Let i_perc=%scan(%bquote(&l_perc), &i_list.);
  %put Top &i_perc.% Industry Firms;
  
  data dt_indf;
    set dt_indf;
    &ivaro._&i_perc.=0;
    
    if mkt_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	&ivaro._&i_perc.= 1;
      end;
  run;
  
  %Let l_ind_var = &l_ind_var. &ivaro._&i_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Firms;
%put &l_ind_var.;

/*
--------------------------------------------------------------------------------
Aggregate to year-industry level
*/

proc sort data=dt_indf; by year ch_ind;

proc means data=dt_indf noprint;
  by year ch_ind;
  output out=dti_out(drop=_type_ _freq_) sum(&l_ind_var.)=&l_ind_var.;
run;

%if %sysfunc(exist(dt_out)) %then %do;
  data dt_out;
    merge dt_out dti_out;
    by year ch_ind;
  run;
%end;
%else %do;
  data dt_out;
    set dti_out;
  run;
%end;

%mend;

/*
================================================================================
Run the macro
*/

%n_var_by_indf(ivar= , ivaro=n_firm);
%n_var_by_indf(ivar=lbdid, ivaro=n_est);
%n_var_by_indf(ivar=zipcode, ivaro=n_zip);
%n_var_by_indf(ivar=fips, ivaro=n_fips);
%n_var_by_indf(ivar=msa, ivaro=n_msa);
%n_var_by_indf(ivar=czone, ivaro=n_czone);
%n_var_by_indf(ivar=msa1983, ivaro=n_msa1983);
%n_var_by_indf(ivar=msa1983cz, ivaro=n_msa1983cz);

/*
================================================================================
Export
*/

proc export data=dt_out outfile="&dir_out./mkt_ind_sum.dta" replace;
run;

/* End of SAS file */
